#Import the required libraries
import pandas as pd
import altair as alt
#Import the greenhouse gases data. This dataset contains greenhouse gases emission data from 2012 to 2021
greenhouse_data= pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/GHG_Reporting_Program_Publication.csv")
greenhouse_data.head()
| Reporter | Year | Sector | Subsector | Primary NAICS Code | NAICS Definition | Parent Company | City | County | Jurisdiction | ... | Notes | Point Size | Location | Location duplicate | Latitude | Longitude | New Georeferenced Column | Counties | Congressional Districts | WAOFM - GIS - Legislative District Boundary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alcoa Intalco Works - Ferndale | 2021 | Metals | Aluminum Production | 331312 | Primary Aluminum Production | Alcoa Corp. | Ferndale | Whatcom | Ecology: Industrial Section | ... | Curtailed mid 2020. | 1 | (48.8455, -122.7055) | (48.8455, -122.7055) | 48.845500 | -122.705500 | POINT (-122.7055 48.8455) | 2744.0 | 1.0 | 44.0 |
| 1 | Avista Corporation - supplier | 2021 | Supplier | Natural Gas Supplier | 4911 | NaN | NaN | Spokane | Spokane | Spokane Regional Clean Air Agency | ... | Currently Undergoing Verification. | 4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | Frito Lay - Vancouver | 2021 | Food Production | Other Food Products | 311919 | Other Snack Food Manufacturing | PepsiCo Inc | Vancouver | Clark | Southwest Clean Air Agency | ... | NaN | 1 | (45.657037, -122.692) | (45.657037, -122.692) | 45.657037 | -122.692000 | POINT (-122.692 45.657037) | 2977.0 | 3.0 | 8.0 |
| 3 | Georgia-Pacific Consumer Operations LLC - Camas | 2017 | Pulp and Paper | Kraft Mills | 322121 | Paper (except Newsprint) Mills | Koch Industries Inc. | Camas | Clark | Ecology: Industrial Section | ... | NaN | 3 | (45.584271, -122.40843) | (45.584271, -122.40843) | 45.584271 | -122.408430 | POINT (-122.40843 45.584271) | 2977.0 | 3.0 | 25.0 |
| 4 | Interfor US, Inc. - Port Angeles | 2017 | Wood Products | Lumber Mills | 321113 | Sawmills | Interfor US Inc. | Port Angeles | Clallam | Olympic Region Clean Air Agency | ... | NaN | 2 | (48.103425, -123.518642) | (48.103425, -123.518642) | 48.103425 | -123.518642 | POINT (-123.518642 48.103425) | 2976.0 | 6.0 | 30.0 |
5 rows × 29 columns
#Find the shape of the data
greenhouse_data.shape
(1608, 29)
# Defining Custom Themes for the plots
def custom_theme():
return {
'config': {
'view': {
'height': 400,
'width': 600,
},
"axis": {
"labelFontSize": 14,
"titleFontSize": 16,
},
"title": {
# "color": "LightSeaGreen",
"fontSize": 20
},
"legend": {
"direction": "vertical",
"labelFontSize": 12,
"titleFontSize": 14
}
}
}
# register the custom theme under a chosen name
alt.themes.register('custom_theme', custom_theme)
# enable the newly registered theme
alt.themes.enable('custom_theme')
ThemeRegistry.enable('custom_theme')
#Creating a bar graph to display sector wise emissions in WA state.
#Grouping the total emissions by Sector and calculating the percentage of emissions in each sector.
bar_chart = alt.Chart(greenhouse_data).mark_bar(filled=True).transform_aggregate(
tot_b ='count()',
Total ='sum(Total Emissions (MTCO2e))',
groupby=['Sector']).transform_window(
TotalAll='sum(Total)',
frame=[None, None]
).transform_calculate(
#Percentage Calculation
perct='datum.Total / datum.TotalAll',
perct_b='(datum.tot_b/datum.TotalAll)*10000')
#Plot the bar graph by specifying the x,y position encodings
emmisson_perct = bar_chart.mark_bar().encode(
alt.Y('Sector:N',sort=alt.EncodingSortField(
op='max', field='perct', order='descending'
)),
alt.X('perct:Q',axis=alt.Axis(format='%'),title='Emission Rate')
).properties(
width=400,
height=400,
title="Sector-Specific Greenhouse Gas Emissions in WA State")
#Adding text encoding to the bar graph and aliging the text
text_label = emmisson_perct.mark_text(align='left', baseline='middle', dx=5, dy=-5).encode(
alt.Text('perct:Q',format=".0%"),color=alt.value('black')
)
#Layer the bar graph with the text encoding
alt.layer(emmisson_perct,text_label).configure_title(
fontSize=20).configure_view(
stroke='transparent').configure_axis(labelFontSize=12)
It can be observed that “Power Plants” are the main contributors to greenhouse gas emissions followed by “Pulp and Paper”, “Petroleum Systems”, and “Suppliers”.
#Creating a bar graph to display county wise emissions in WA state.
#Grouping the total emissions by County and calculating the percentage of emissions in each county.
bar_graph= alt.Chart(greenhouse_data).mark_bar().transform_aggregate(
tot_b ='count()',
Total ='sum(Total Emissions (MTCO2e))',
groupby=['County']).transform_window(
TotalAll='sum(Total)',
Rank='rank(Total)',
sort=[alt.SortField('Total', order='descending')],
frame=[None, None]
).transform_calculate(
#Percentage Calculation
perct='datum.Total / datum.TotalAll').transform_filter(
'datum.Rank < 10').encode(
#Plot the bar graph by specifying the x,y position encodings
alt.Y('County:N',sort=alt.EncodingSortField(op='max', field='perct',order='descending')),
alt.X('perct:Q',axis=alt.Axis(format='%'),title='Emission Rate')
).properties(
width=400,
height=400,
title='County-Level Analysis of Greenhouse Gas Emissions'
)
#Adding text encoding to the bar graph and aliging the text
text_label = bar_graph.mark_text(align='left', baseline='middle', dx=5, dy=1).encode(
alt.Text('perct:Q',format=".0%"),color=alt.value('black')
)
#Layer the bar graph with the text encoding
alt.layer(bar_graph+text_label)
Lewis has the highest rate of greenhouse emissions, followed by King, Whatcom, and Cowlitz.
#Creating a line graph to display the greenhouse gases emission trend in WA state.
line_graph= alt.Chart(greenhouse_data).mark_line(point=True).encode(
alt.X('Year:N',title=''),
#Change the Y axis scale to display in millions
alt.Y('sum(Total Emissions (MTCO2e)):Q', axis=alt.Axis(format='s', labelExpr="datum.label"),scale=alt.Scale(zero=False),title='Total Emissions (MTCO2e)')
).transform_calculate(
label="datum.value / 1000000"
).properties(
title='Greenhouse Gas Emission Trend'
)
#Create a scatter plot to highlight that 2019 had highest emissions
#Highlight that specific datapoint in red
scatter = alt.Chart(greenhouse_data).mark_point(color='red',size=20).transform_filter(
'datum.Year == 2019'
).encode(
alt.X('Year:N',title=''),
alt.Y('sum(Total Emissions (MTCO2e)):Q', axis=alt.Axis(format='s', labelExpr="datum.label"), scale=alt.Scale(zero=False),title='Total Emissions (MTCO2e)')
).transform_calculate(
label="datum.value / 1000000"
)
#Adding text encoding to the line graph and aliging the text
text = alt.Chart(greenhouse_data).mark_text(dx=-5,dy=-10,text='44.4M',fontSize=13,align='left', baseline='middle').transform_filter(
'datum.Year == 2019'
).encode(
alt.X('Year:N',title=''),
alt.Y('sum(Total Emissions (MTCO2e)):Q', axis=alt.Axis(format='s', labelExpr="datum.label"), scale=alt.Scale(zero=False),title='Total Emissions (MTCO2e)')
).transform_calculate(
label="datum.value / 1000000"
)
scatter = scatter + text
#Layer the line graph with the scatter plot(point with text encoding)
alt.layer(line_graph,scatter).configure_axis(
labelAngle=0
)
It can be observed that the emissions have increased by 61% from 2012 – 2019, before declining to 38.1M emissions in 2021.
Creating a multi line graph in different colors to display the greenhouse gases emissions trend in top 5 sectors in WA state.
To avoid overlapping of text encoding in the multi line graph. Creating separate plots and aligning then individually.
Finally layering all plots into a single multi-line graph.
#This is a multi line graph for 'Power Plants', 'Supplier'sectors
base_chart = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3,
opacity=0.7
).transform_filter(
#Filter specific sectors
alt.FieldOneOfPredicate(field='Sector', oneOf=['Power Plants', 'Supplier'])
).encode(
alt.X('Year:N',title=''),
#Change the Y axis scale to display in millions
alt.Y('sum(Total Emissions (MTCO2e)):Q', axis=alt.Axis(format='s', labelExpr="datum.label"),title='Total Emissions (MTCO2e)'),
alt.Color('Sector:N',legend=None),
).transform_calculate(
label="datum.value / 1000000"
).properties(
title='Total Emissions trend in top 5 sectors'
)
#Adding text encoding to the line graph and aliging the text
text_labels = base_chart.mark_text(dx=5,dy=-78,align='left', baseline='middle').transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Power Plants', 'Supplier'])
).encode(
#Aligning the text such that the text is added at the end of the line
x='max(Year):N',
y='max(Total Emissions (MTCO2e)):Q',
text='Sector:N',
#Assigning unique colors to 'Power Plants', 'Supplier'sectors
color='Sector:N'
)
#Combine the line graph with the text encoding
plot1 = base_chart + text_labels
#This is a line graph for 'Petroleum Systems' sector
base_chart = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3,
opacity=0.7
).transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Petroleum Systems'])
).encode(
alt.X('Year:N',title=''),
alt.Y('sum(Total Emissions (MTCO2e)):Q' ,axis=alt.Axis(format='s', labelExpr="datum.label")),
#Assigning unique color to 'Petroleum Systems' sector
color=alt.value('goldenrod')
).transform_calculate(
label="datum.value / 1000000"
).properties(
width=600,
height=400
)
text_labels = base_chart.mark_text(dx=10,dy=-100,align='left', baseline='middle').transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Petroleum Systems'])
).encode(
x='max(Year):N',
y='max(Total Emissions (MTCO2e)):Q',
text='Sector:N',
color=alt.value('goldenrod')
)
#Combine the line graph with the text encoding
plot2 = base_chart + text_labels
#This is a line graph for 'Pulp and Paper' sector
base_chart = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3,
opacity=0.7
).transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Pulp and Paper'])
).encode(
alt.X('Year:N',title=''),
alt.Y('sum(Total Emissions (MTCO2e)):Q' ,axis=alt.Axis(format='s', labelExpr="datum.label")),
#Assigning unique color to 'Pulp and Paper' sector
color=alt.value('seagreen')
).transform_calculate(
label="datum.value / 1000000"
).properties(
)
text_labels = base_chart.mark_text(dx=10,dy=-125,align='left', baseline='middle').transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Pulp and Paper'])
).encode(
x='max(Year):N',
y='max(Total Emissions (MTCO2e)):Q',
text='Sector:N',
color=alt.value('seagreen')
)
#Combine the line graph with the text encoding
plot3 = base_chart + text_labels
#This is a line graph for 'Metals' sector
base_chart = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3,
opacity=0.7
).transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Metals'])
).encode(
alt.X('Year:N',title=''),
alt.Y('sum(Total Emissions (MTCO2e)):Q' ,axis=alt.Axis(format='s', labelExpr="datum.label")),
#Assigning unique color to 'Metals' sector
color=alt.value('grey')
).transform_calculate(
label="datum.value / 1000000"
)
text_labels = alt.Chart(greenhouse_data).mark_text(dx=7,dy=-10,align='left', baseline='middle').transform_filter(
alt.FieldOneOfPredicate(field='Sector', oneOf=['Metals'])
).encode(
x='max(Year):N',
y='average(Total Emissions (MTCO2e)):Q',
text='Sector:N',
color=alt.value('grey')
#color='Sector:N'
)
#Combine the line graph with the text encoding
plot4 = base_chart + text_labels
#Layer all the graphs and create a single plot that shows the total emissions trend in top 5 sectors.
alt.layer(plot1,plot2,plot3,plot4).configure_axis(
labelAngle=0
).configure_view(stroke='transparent')
It can be seen that “Power Plants” are the major contributors to emissions followed by Suppliers, and Pulp and Paper. It can be observed that the emissions from power plants increased between 2012-2018 and peaked in 2019, and declined in 2020 and 2021, potentially due to COVID-19. While emission trends of the Supplier, Pulp and Paper, and Petroleum Systems sectors remain consistent, the emissions from Metals tend to decline over the last decade.
Creating a multi line graph in different colors to display the Greenhouse Gas Emissions by Gas trend in WA state.
Since the dataset contains information of Carbon Dioxide (MTCO2e),Biogenic Carbon Dioxide (MTCO2e),PFCs (MTCO2e),Methane (MTCO2e) in separate columns. Creating separate plots and layering them together into a multi-line graph at the end.
#This is a line graph to display the Carbon Dioxide (MTCO2e) emission trend
co2_line = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3
).encode(
alt.X('Year:N'),
alt.Y('mean(Carbon Dioxide (MTCO2e)):Q',title='Mean of Emissions (MTCO2e)',axis=alt.Axis(format='s', labelExpr="datum.label")),
color=alt.value('steelblue')
).transform_calculate(
label="datum.value / 1000000"
).properties(
width=600,
height=400
)
co2_text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=-5,text='CO2',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(Carbon Dioxide (MTCO2e)):Q',
#text='CO2',
#color=alt.value('lightblue')
)
co2 = co2_line + co2_text
#This is a line graph to display the Biogenic Carbon Dioxide (MTCO2e) emission trend
biogenic_line = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3
).encode(
alt.X('Year:N'),
alt.Y('mean(Biogenic Carbon Dioxide (MTCO2e)):Q',axis=alt.Axis(format='s', labelExpr="datum.label")),
color=alt.value('orange')
).transform_calculate(
label="datum.value / 1000000"
).properties(
width=600,
height=400
)
biogenic_text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=5,text='Biogenic CO2',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(Biogenic Carbon Dioxide (MTCO2e)):Q',
#text='CO2',
#color=alt.value('orange')
)
biogenic = biogenic_line + biogenic_text
#This is a line graph to display the PFCs (MTCO2e) emission trend
pfc_line = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3
).encode(
alt.X('Year:N',title=""),
alt.Y('mean(PFCs (MTCO2e)):Q',axis=alt.Axis(format='s', labelExpr="datum.label")),
color=alt.value('darkseagreen')
).transform_calculate(
label="datum.value / 1000000"
)
pfc_text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=2,text='PFCs',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(PFCs (MTCO2e)):Q',
#text='CO2',
#color=alt.value('darkseagreen')
)
pfc = pfc_line + pfc_text
#This is a line graph to display the Methane (MTCO2e) emission trend
methane_line = alt.Chart(greenhouse_data).mark_line(point=True,
strokeWidth=3
).encode(
alt.X('Year:N'),
alt.Y('mean(Methane (MTCO2e)):Q',axis=alt.Axis(format='s', labelExpr="datum.label")),
color=alt.value('grey')
).transform_calculate(
label="datum.value / 1000000"
)
methane_text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=-10,text='Methane',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(Methane (MTCO2e)):Q',
#text='CO2',
#color=alt.value('grey')
)
methane = methane_line + methane_text
#Layer all the graphs and create a single plot that shows the Greenhouse Emissions by Gas
alt.layer(co2,biogenic,pfc,methane).properties(
title='Emissions by Gas Type'
).configure_axis(
labelAngle=0
).configure_view(stroke='transparent')
It can be seen that CO2 emissions are significantly higher in WA compared to the other gases. Over the last decade, it can be observed that CO2 emissions have increased by approximately 80%. While the emissions of Biogenic CO2, methane, and PFCs remain consistent.
#For Carbon Dioxide
line = alt.Chart(greenhouse_data).mark_line().encode(
alt.X('Year:N',title=''),
alt.Y('mean(Carbon Dioxide (MTCO2e)):Q',title='Emission (MTCO2e)'),
)
text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=-5,text='CO2',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(Carbon Dioxide (MTCO2e)):Q',
#color=alt.value('steelblue')
)
line = line + text
band = alt.Chart(greenhouse_data).mark_errorband(extent='ci').encode(
alt.X('Year:N',title=''),
alt.Y('Carbon Dioxide (MTCO2e):Q',title='')
)
layer1=line+band
#For Biogenic Carbon Dioxide
line = alt.Chart(greenhouse_data).mark_line(color='orange').encode(
alt.X('Year:N'),
alt.Y('mean(Biogenic Carbon Dioxide (MTCO2e)):Q'),
)
text = alt.Chart(greenhouse_data).mark_text(dx=7,dy=-5,text='Biogenic CO2',fontSize=13,align='left', baseline='middle').encode(
x='max(Year):N',
y='mean(Biogenic Carbon Dioxide (MTCO2e)):Q',
#color=alt.value('orange')
)
line = line + text
band = alt.Chart(greenhouse_data).mark_errorband(extent='ci',color='orange').encode(
alt.X('Year:N',title=''),
alt.Y('Biogenic Carbon Dioxide (MTCO2e):Q',title='')
)
layer2=line+band
alt.layer(layer1,layer2).properties(
width=600,
height=400,title='Greenhouse Gas Emissions by Gas, 2012–2021'
).configure_axis(
labelAngle=0
).configure_view(stroke='transparent')
Since CO2 and Biogenic CO2 emissions are higher compared to other gases. A line chart with a confidence interval band is used to show the variability associated with the emissions of the gas between the years 2012-2021.
Creating a line graph to visualize the trends in the energy consumption in Washington State for the year 2012 to 2021
The line graph is layered with the text for the accessibility
import pandas as pd
file_path = "/Users/kat/Documents/SeattleU/Fall 23/Data Viz/Project/Renewable_Energy_Data.xlsx"
energy_sources = ['Biomass', 'Geothermal', 'Hydropower', 'Solar Energy', 'Wind Energy']
# Create an empty DataFrame to store the results
energy_data = pd.DataFrame()
# Loop through each energy source
for source in energy_sources:
# Read the Excel file for the current energy source
data = pd.read_excel(file_path, source)
# Set column names to the second row
data.columns = data.iloc[1]
# Filter data for the state of Washington
data = data[data.State == "WA"]
# Rename columns and convert to the appropriate data types
data.columns = ['State'] + [int(col) for col in data.columns[1:]]
data[data.columns[1:]] = data[data.columns[1:]].astype('int')
data = data.rename(columns={'State': 'Energy'})
data['Energy'] = data.Energy.replace('WA', source)
# Append the current energy source data to the overall DataFrame
energy_data = energy_data.append(data)
# Reset the index of the final DataFrame
energy_data = energy_data.reset_index(drop=True)
# Melt the DataFrame to reshape it
combined_df = pd.melt(energy_data, id_vars=['Energy'], var_name='Year', value_name='Energy_Consumption(Btu)')
# Convert the 'Year' column to integers
combined_df['Year'] = combined_df['Year'].astype(int)
# Filter rows based on the specified list of years
selected_years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
combined_df = combined_df[combined_df['Year'].isin(selected_years)]
combined_df.head(5)
<ipython-input-77-f8ce568229bb>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. energy_data = energy_data.append(data) <ipython-input-77-f8ce568229bb>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. energy_data = energy_data.append(data) <ipython-input-77-f8ce568229bb>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. energy_data = energy_data.append(data) <ipython-input-77-f8ce568229bb>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. energy_data = energy_data.append(data) <ipython-input-77-f8ce568229bb>:27: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. energy_data = energy_data.append(data)
| Energy | Year | Energy_Consumption(Btu) | |
|---|---|---|---|
| 260 | Biomass | 2012 | 121432 |
| 261 | Geothermal | 2012 | 1136 |
| 262 | Hydropower | 2012 | 851343 |
| 263 | Solar Energy | 2012 | 266 |
| 264 | Wind Energy | 2012 | 62803 |
#This is a line graph for 'Hydropower','Biomass','Wind Energy','Geothermal'
base_chart = alt.Chart(combined_df).mark_line(point=True,
strokeWidth=3,
opacity=0.7
).transform_filter(
alt.FieldOneOfPredicate(field='Energy', oneOf=['Hydropower','Biomass','Wind Energy','Geothermal'])
).encode(
alt.X('Year:O',title=""),alt.Y('Energy_Consumption(Btu):Q',scale=alt.Scale(type='log'),title="Energy Consumption (Billion Btu)"),
alt.Color('Energy:N',legend=None)).properties(
title="Renewable Energy Consumption Over Time",
width=600,
height=400
)
text_labels = base_chart.mark_text(dx=3,align='left', baseline='middle').transform_filter(
alt.FieldOneOfPredicate(field='Energy', oneOf=['Hydropower','Biomass','Wind Energy','Geothermal'])
).encode(
x='max(Year):N',
y='average(Energy_Consumption(Btu)):Q',
text='Energy:N',
color=alt.value('black')
)
plot1 = (base_chart+text_labels)
#This is a line graph for 'Solar power'
line_chart = alt.Chart(combined_df).mark_line(point=True).transform_filter(
alt.FieldOneOfPredicate(field='Energy', oneOf=['Solar Energy'])
).encode(
alt.X('Year:O',title=""),alt.Y('Energy_Consumption(Btu):Q',scale=alt.Scale(type='log'),title="Energy Consumption (Billion Btu)"),
alt.Color('Energy:N',legend=None)).properties(
title="Renewable Energy Consumption Over Time",
width=600,
height=400)
text_labels = line_chart.mark_text(dx=3,dy=-50,align='left', baseline='middle').encode(
x='max(Year):N',
y='average(Energy_Consumption(Btu)):Q',
text='Energy:N',
color=alt.value('black')
)
plot2 = (line_chart+text_labels)
# Layering the plots
alt.layer(plot1,plot2).configure_view(stroke='transparent').configure_axis(
labelAngle=0
)
The visual shows that the Hydropower is the most consumed energy in the state of washington and also the Solar energy consumption has increased exponentially over the period. There is a dip in hydropower and wind energy while the solar energy spiked in 2019.
# Importing the Fuel Consumption Data
file = pd.ExcelFile("/Users/kat/Documents/SeattleU/Fall 23/Data Viz/Project/emission_by_fuel.xlsb", engine='pyxlsb')
fuel = pd.read_excel(file,'FUEL_DATA')
# Data Manipulation
fuel.columns = fuel.iloc[4]
fuel= fuel.iloc[4:,:]
# Filtering the Washington data
fuel = fuel[fuel['State']=='WA']
fuel = fuel[fuel['Reporting Year']!=2010]
fuel = fuel[fuel['Reporting Year']!=2011]
alt.Chart(fuel).mark_bar().encode(
alt.Y('General Fuel Type:N',sort='-x'),alt.X('count(Industry Type (subparts)):O',title="Number of Industries")).properties(
width=300,height=300,title=" Fuel utilized by Industries in Washington State")
The bar graph shows fuel type used by the industries. Most industries in Washington uses Natural Gas.
A Cholorpleth map is created to visualize the distribution of green house gas emisions by counties in the Washington state
# Source
df_sample = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/minoritymajority.csv')
# County Information
County_loc = pd.read_csv('/Users/kat/Documents/SeattleU/Fall 23/Data Viz/Project/simplemaps_uscounties_basicv1.73/uscounties.csv')
# Data Manipulation
County_df = df_sample[df_sample['STNAME']=='Washington']
County_df = County_df.rename(columns={'CTYNAME':'County'})
County_df['County'] = County_df['County'].str.replace(' County', '')
County_df = County_df.iloc[:,[0,1,2]].reset_index(drop=True)
#County_df.info()
print("Washington State has :",County_df['County'].nunique(),"counties")
# Combining the GHG data with the county information
df_1 = greenhouse_data[['County','Total Emissions (MTCO2e)']]
df_1 = df_1.rename(columns={'Total Emissions (MTCO2e)':'tot_em'})
merged_df = pd.merge(County_df,df_1,on="County",how='right')
merged_df['tot_em'] = merged_df['tot_em'].fillna(0)
merged_df['tot_em'] = merged_df['tot_em'].astype('int')
merged_df['FIPS'] = merged_df['FIPS'].fillna(0)
merged_df['FIPS'] = merged_df['FIPS'].astype('int')
# Filtering the required Columns
merged_df = merged_df.iloc[:,[0,2,3]]
merged_df =merged_df.rename(columns={'FIPS':'id'})
County_loc = County_loc[County_loc['state_id']=="WA"]
County_loc = County_loc.iloc[:,[0,3,6,7]]
County_loc= County_loc.rename(columns={'county':'County','county_fips':'id'})
merged_df = pd.merge(County_loc,merged_df,on="County",how='right')
merged_df= merged_df.rename(columns={'id_x':'id'})
merged_df = merged_df.drop(columns="id_y")
#merged_df.to_csv("wa_map_tableau.csv") - Dataset for Tableau
Washington State has : 39 counties
# Creating Chart in Altair
from vega_datasets import data
counties = alt.topo_feature(data.us_10m.url, 'counties')
wa_map = alt.Chart(counties).mark_geoshape(
stroke='black').encode(
color=alt.Color('tot_em:Q',bin=alt.Bin(maxbins=10),sort="descending",
scale=alt.Scale(scheme='orangered'),
title="Total Emission(MTCO2e)"),
text=alt.Text('County:N')
).transform_calculate(state_id = "(datum.id / 1000)|0").transform_filter(
(alt.datum.state_id)==53).transform_lookup(
lookup='id',
from_=alt.LookupData(merged_df, 'id', list(merged_df.columns))
).properties(title="Emission Across Counties In Washington State")
wa_map
Lewis County has the most emission. This could also because of the Power plant located.
The Map is created to understand the renewable energy generation across the country
renew_map = pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/Renewable_Map_data.csv")
renew_map.State = renew_map.State.str.strip()
renew_map['Renewable'] = renew_map['Renewable'].str.rstrip("%")
renew_map['Renewable'] = renew_map['Renewable'].astype('int')
# Combining with the sample dataset to get the state unique id:
state_id = data.population_engineers_hurricanes()
state_id = state_id[['state','id']]
state_id = state_id.rename(columns={'state':'State'})
renew_state = pd.merge(state_id,renew_map,on="State",how='right')
# Creating USA chloropleth Map:
counties = alt.topo_feature(data.us_10m.url,feature='states')
chart1 = alt.Chart(counties).mark_geoshape(
stroke='black').encode(
color=alt.Color('Renewable:Q',bin=alt.Bin(maxbins=5),sort="descending",
scale=alt.Scale(scheme='darkgreen',reverse=True),title="Renewable Generation %")
).transform_lookup(
lookup='id',
from_=alt.LookupData(renew_state, 'id', ['Renewable'])
).project(
type='albersUsa'
).properties(title="State Generating Renewable Energy in United States",
width=600,
height=400
)
# Creating bar Chart:
us_state = alt.Chart(renew_map).mark_bar(color="lightgrey").transform_window(
Rank='rank(Renewable)',
sort=[alt.SortField('Renewable', order='descending')],
frame=[None, None]
).transform_calculate(
perct='datum.Renewable /100').transform_filter(
'datum.Rank < 10').encode(
alt.X('perct:Q'),alt.Y('State:N',sort=('-x'))).properties(title="State with High Renewable Energy Generation")
wa_bar = alt.Chart(renew_map).mark_bar(color="green").transform_filter('datum.State=="Washington"').transform_window(
Rank='rank(Renewable)',
sort=[alt.SortField('Renewable', order='descending')],
frame=[None, None]
).transform_calculate(
perct='datum.Renewable /100').transform_filter(
'datum.Rank < 10').encode(
alt.X('perct:Q',axis=alt.Axis(format='%'),title="Contribution of Renewable Energy(%)"),alt.Y('State:N',sort=('-x'))).properties(title="State with High Renewable Energy Generation")
wa_text = wa_bar.mark_text(align='left', baseline='middle', dx=5, dy=-5).encode(
alt.Text('perct:Q',format=".0%"),color=alt.value('black')
)
chart2 =(us_state +wa_bar+wa_text).properties(width=600,height=200)
alt.vconcat(chart1,chart2)
Washington Rank number 3 in renewable energy generation
# Import the power plant data, fuel names, and the rainfall data
powerplants_df = pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/powerplant_data.csv")
energytype_df = pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/energytype_data.csv")
rainfall_df = pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/rainfall_data.csv")
# Merge the power plant data and fuel names
powerplant_df = powerplants_df.merge(energytype_df, how='inner', left_on = 'AER Fuel Type Code', right_on ='AER Fuel Type Code')
# Merge the power plant data and average rain fall
powerplant_and_rain_df = powerplant_df.merge(rainfall_df, how='inner', left_on = 'year', right_on ='year')
# Group the columns and sum up total energy generation
powerplant_fuel_df = powerplant_and_rain_df.groupby(['EnergyType', 'IsRenewable', 'year', 'avg_rainfall'])[['Net Generation (Megawatthours)']].sum()
# Create the percentage changes between years
powerplant_fuel_df['pct_change'] = powerplant_fuel_df['Net Generation (Megawatthours)'].pct_change()
powerplant_fuel_df.head()
| Net Generation (Megawatthours) | pct_change | ||||
|---|---|---|---|---|---|
| EnergyType | IsRenewable | year | avg_rainfall | ||
| Biomass and Biogas | True | 2012 | 36.02 | 225517.046 | NaN |
| 2013 | 24.06 | 263220.536 | 0.167187 | ||
| 2014 | 34.81 | 278705.728 | 0.058830 | ||
| 2015 | 32.21 | 268629.758 | -0.036153 | ||
| 2016 | 34.00 | 270192.239 | 0.005816 |
# Reset the table index because the hierarchy columns can't be graphed
powerplant_fuel_df.reset_index(inplace = True)
# Calculate the generation difference from last year
powerplant_fuel_df['generation_change'] = powerplant_fuel_df['Net Generation (Megawatthours)'] - (powerplant_fuel_df['Net Generation (Megawatthours)']/(1+powerplant_fuel_df['pct_change']))
# Convert the decimals to actual percentage values, so 0.16 = 16%
powerplant_fuel_df['pct_change'] = powerplant_fuel_df['pct_change']*100
powerplant_fuel_df[powerplant_fuel_df['EnergyType'] == 'Solar']
| EnergyType | IsRenewable | year | avg_rainfall | Net Generation (Megawatthours) | pct_change | generation_change | |
|---|---|---|---|---|---|---|---|
| 87 | Solar | True | 2012 | 36.02 | 794.0 | -99.815709 | -430046.01 |
| 88 | Solar | True | 2013 | 24.06 | 762.0 | -4.030227 | -32.00 |
| 89 | Solar | True | 2014 | 34.81 | 729.0 | -4.330709 | -33.00 |
| 90 | Solar | True | 2015 | 32.21 | 790.0 | 8.367627 | 61.00 |
| 91 | Solar | True | 2016 | 34.00 | 727.0 | -7.974684 | -63.00 |
| 92 | Solar | True | 2017 | 35.38 | 481.0 | -33.837689 | -246.00 |
| 93 | Solar | True | 2018 | 28.93 | 1695.0 | 252.390852 | 1214.00 |
| 94 | Solar | True | 2019 | 24.05 | 43728.0 | 2479.823009 | 42033.00 |
| 95 | Solar | True | 2020 | 32.77 | 45872.0 | 4.903037 | 2144.00 |
| 96 | Solar | True | 2021 | 32.68 | 50208.0 | 9.452389 | 4336.00 |
# Electricity Generation by Fuel Type Over each year
linechart = alt.Chart(powerplant_df).mark_line(
point=True,
strokeWidth=3,
opacity=0.7
).transform_aggregate(
TotalGeneration='sum(Net Generation (Megawatthours))',
groupby=['EnergyType', 'year']
).transform_window(
Rank='rank()',
sort=[alt.SortField('TotalGeneration', order='descending')]
).transform_filter(
'datum.Rank <= 50'
).encode(
alt.X('year:N', title = ''),
alt.Y('TotalGeneration:Q', title = 'Electricity Generated (MWh)'),
alt.Color('EnergyType:N',
title = 'Fuel Used',
legend=None,
scale = alt.Scale(
domain = ['Coal ', 'Hydroelectric Conventional', 'Natural Gas', 'Nuclear', 'Wind'],
range=['gray', 'steelblue', 'orange', '#ff9da6', '#2ca02c']))
).properties(
title = 'Electricity Produced By Fuel Type'
)
linetext = alt.Chart(powerplant_df).mark_text(align='left', dx=5, dy=5).transform_aggregate(
TotalGeneration='sum(Net Generation (Megawatthours))',
groupby=['EnergyType', 'year']
).transform_window(
Rank='rank()',
sort=[alt.SortField('TotalGeneration', order='descending')]
).transform_filter(
'datum.Rank <= 50 & datum.year == 2021'
).encode(
alt.X('year:N', title = ''),
alt.Y('TotalGeneration:Q', title = 'Electricity Generated (MWh)'),
alt.Text('EnergyType:N'),
opacity=alt.condition('datum.EnergyType == "Wind"', alt.value(0), alt.value(1))
).properties(
title = 'Electricity Produced By Fuel Type'
)
windtext = alt.Chart(powerplant_df).mark_text(align='left', dx=5, dy=-5).transform_aggregate(
TotalGeneration='sum(Net Generation (Megawatthours))',
groupby=['EnergyType', 'year']
).transform_window(
Rank='rank()',
sort=[alt.SortField('TotalGeneration', order='descending')]
).transform_filter(
'datum.Rank <= 50 & datum.year == 2021'
).encode(
alt.X('year:N', title = ''),
alt.Y('TotalGeneration:Q', title = 'Electricity Generated (MWh)'),
alt.Text('EnergyType:N'),
opacity=alt.condition('datum.EnergyType == "Wind"', alt.value(1), alt.value(0))
).properties(
title = 'Electricity Produced By Fuel Type'
)
plot=linechart + linetext + windtext
plot.configure_axis(
labelAngle=0
).configure_view(stroke='transparent')
This line chart is used to display the amount of electricity produced by the top 5 fuel types used by power plants each year. The amount of electricity generated is a quantitative variable and the most important for comparing the fuel types, so it has been placed on the Y axis. Year, which is a nominal variable, was put along the X axis so the data could be easily read from left to right. Then the colors and a text label were used to represent the fuel type, which is categorical. The legend was hidden, because it wasn’t necessary with the text label. We can see that hydroelectric power is going down and natural gas is going up, so we need to check why this is occuring.
# Not being used
alt.Chart(powerplant_df, title = 'Percentage of Renewable Power Plant Energy').mark_area().encode(
alt.X('year:O', title = ''),
alt.Y('sum(Net Generation (Megawatthours)):Q',
stack = "normalize",
axis = alt.Axis(format=".0%"),
title = 'Electricity Generated (MWh)'),
alt.Color('IsRenewable:N', title = 'Renewable Energy')
).configure_range(
category={'scheme': 'paired'}
).configure_axis(
labelAngle=0
)
alt.Chart(powerplant_df).mark_bar().transform_aggregate(
TotalGeneration='sum(Net Generation (Megawatthours))',
groupby=['EnergyType']
).transform_window(
Rank='rank()',
sort=[alt.SortField('TotalGeneration', order='descending')]
).transform_filter(
'datum.Rank <= 5'
).encode(
alt.X('TotalGeneration:Q', title = 'Electricity Generaged (MWh)'),
alt.Y('EnergyType:N', title = 'Fuel Type', sort=alt.EncodingSortField(
field='TotalGeneration', order='descending')),
alt.Color('EnergyType:N', title = 'Fuel Type',legend=None,
scale = alt.Scale(
domain = ['Coal ', 'Hydroelectric Conventional', 'Natural Gas', 'Nuclear', 'Wind'],
range=['gray', 'steelblue', 'orange', '#ff9da6', '#2ca02c']))
).properties(
title = 'Most Common Fuel Type Used to Produce Electricity'
)
Total electricity generated by each energy source summed between 2012 and 2021. It is sorted in descending order to see the types that are porducing the most electricity, but the bar graph also allows us to compare the resources that produce less energy. Also the energy types are color encoded to match certain features about the resouce.
barcharts = alt.Chart(
powerplant_fuel_df,
title = 'Impacts on Hydroelectric and Natural Gas Power Generation'
).mark_bar().transform_filter(
'datum.year != 2012 & (datum.EnergyType == "Hydroelectric Conventional" | datum.EnergyType == "Natural Gas")'
).encode(
#alt.Y('year:O', title = 'Year'),
alt.Y('pct_change:Q', title='Percent Change (%)'),
alt.X('EnergyType:N', title = '', axis=alt.Axis(labels=False, tickSize = 0)),
alt.Color('EnergyType', title = 'Fuel Type'),
alt.Column('year:O', title = '')
).properties(width = 50, height = 250)
hydro = alt.Chart(powerplant_fuel_df).mark_circle(size = 40).transform_filter(
'datum.year != 2012 & (datum.EnergyType == "Hydroelectric Conventional")'
).encode(
alt.Y('pct_change:Q', title = 'Percent (%) Change',
scale = alt.Scale(domain=[-40,120])),
alt.X('avg_rainfall:Q', title = 'Average Rainfall (in)',
scale = alt.Scale(domain=[20,40])),
).properties(width = 300, height = 250)
hydro_text = alt.Chart(powerplant_fuel_df).mark_text(dx=-20, dy=5).transform_filter(
'datum.EnergyType == "Hydroelectric Conventional" & (datum.year == 2013 | datum.year == 2019)'
).encode(
alt.Y('pct_change:Q'),
alt.X('avg_rainfall:Q'),
alt.Text('year:N')
).properties(width = 300, height = 250)
hydro_plot = hydro + hydro_text + hydro.transform_regression(
'avg_rainfall',
'pct_change'
).mark_line(color = 'gray')
natural_gas = alt.Chart(powerplant_fuel_df).mark_circle(
size = 40, color = "orange"
).transform_filter(
'datum.year != 2012 & datum.EnergyType == "Natural Gas"'
).encode(
alt.Y('pct_change:Q', title = '',
axis=alt.Axis(labels=False, tickSize = 0),
scale = alt.Scale(domain=[-40,120])),
alt.X('avg_rainfall:Q', title = 'Average Rainfall (in)',
scale = alt.Scale(domain=[20,40]))
).properties(width = 300, height = 250)
natural_gas_text = alt.Chart(powerplant_fuel_df).mark_text(dx=-20).transform_filter(
'datum.EnergyType == "Natural Gas" & (datum.year == 2013 | datum.year == 2019)'
).encode(
alt.Y('pct_change:Q'),
alt.X('avg_rainfall:Q'),
alt.Text('year:N')
).properties(width = 300, height = 250)
natural_gas_plot = natural_gas + natural_gas_text + natural_gas.transform_regression(
'avg_rainfall',
'pct_change'
).mark_line(color = 'gray')
scatterplots = alt.hconcat(hydro_plot, natural_gas_plot)
alt.vconcat(barcharts, scatterplots).configure_axis(
labelAngle=0
).configure_range(
category=["steelblue", "orange"]
).configure_title(
anchor='middle'
)
To compare the details of the two most used fuel types, a bar graph of the percentage change from the previous year was paired with scatter plots of the increases based on average precipitation. Both sets of charts use position to display the percentage change variable. The top set of graphs use bars because there is only one quantitative variable being measured over the year. The type of fuel used is encoded using colors that match the fuel type. A blue color for hydroelectric because of its use of water and an orange color was used for encoding natural gas because it is burned. The bottom set of graphs measures two quantitative variables, so it uses a scatter plot along with a regression line to display the relationship.
alt.Chart(powerplant_fuel_df, title = 'How Hydroelectric and Nuclear are Changing').mark_bar().transform_filter(
'datum.year != 2012 & (datum.EnergyType == "Hydroelectric Conventional" | datum.EnergyType == "Nuclear")'
).encode(
alt.Y('generation_change:Q', title='Change from Previous Year (MWh)'),
alt.X('EnergyType:N', title = '', axis=alt.Axis(labels=False, tickSize = 0)),
alt.Color('EnergyType', title = 'Fuel Type'),
alt.Column('year:O', title = '')
).properties(width = 50, height = 200).configure_title(
anchor='middle'
)
Similar comparsion to Hydroelectric and Natural gas with percentage change between the years.
# Shorten Solar Plant name
powerplant_df['Plant Name'] = powerplant_df['Plant Name'].str.replace('Horn Rapids Solar, Storage and Training','Horn Rapids Solar')
totalchart = alt.Chart(powerplant_fuel_df).mark_bar(
color = '#ffc900',
).transform_filter(
'datum.EnergyType == "Solar"'
).encode(
alt.Y('sum(Net Generation (Megawatthours)):Q', title = 'Total Electiricity Generated (MWh)'),
alt.X('year:O', title = '', axis=alt.Axis(tickCount=5))
).properties(width = 350, height = 250)
plantchart = alt.Chart(powerplant_df).mark_bar(
color = '#ffc900',
size = 25
).transform_filter(
'datum.EnergyType == "Solar"'
).encode(
alt.Y('Plant Name:N',
title = '',
sort='-x'),
alt.X('mean(Net Generation (Megawatthours)):Q',
title = 'Average Electricity Generated (MWh)'),
).properties(width = 250, height = 250)
text_left = alt.Chart(powerplant_df).mark_text(dx=-25).transform_filter(
'datum.EnergyType == "Solar"'
).encode(
alt.Y('Plant Name:N', title = '', sort='-x'),
alt.X('mean(Net Generation (Megawatthours)):Q', title = 'Average Electricity Generated (MWh)'),
alt.Text('mean(Net Generation (Megawatthours)):Q'),
opacity=alt.condition('datum["Plant Name"] == "Adams Nielson Solar"', alt.value(1), alt.value(0))
).properties(width = 250, height = 250)
text_right = alt.Chart(powerplant_df).mark_text(dx=25).transform_filter(
'datum.EnergyType == "Solar"'
).encode(
alt.Y('Plant Name:N', title = '', sort='-x'),
alt.X('mean(Net Generation (Megawatthours)):Q', title = 'Average Electricity Generated (MWh)'),
alt.Text('mean(Net Generation (Megawatthours)):Q', ),
opacity=alt.condition('datum["Plant Name"] != "Adams Nielson Solar"', alt.value(1), alt.value(0))
).properties(width = 250, height = 250)
plant_average_chart = plantchart + text_left + text_right
alt.hconcat(plant_average_chart, totalchart).properties(
title='Electricity Generated by Solar Plants'
).configure_axis(
labelAngle=0
).configure_title(
anchor='middle'
)
The solar electricity production was color encoded as yellow to differentiate it from the other energy sources. We have the average output from the 4 solar plants between 2012 and 2021 on the left and then on the right, a bar graph showing the total electricity produced from solar plants each year on the right. Adams Nielson Solar plant opened in October of 2018 and this is the reason for the large increase between 2018 and 2019.
# Pull in the powerplant and emissions combined dataset
combined_df = pd.read_csv("https://raw.githubusercontent.com/davidabney9-dev/GHGProject/main/Datasets/combined_data.csv")
power_ghg_df = combined_df.groupby(['GHGRP Facility ID', 'year', 'AER Fuel Type Code', 'Total reported direct emissions', 'CO2 emissions (non-biogenic) ', 'Methane (CH4) emissions ', 'Nitrous Oxide (N2O) emissions '])[['Total Fuel Consumption Quantity', 'Net Generation (Megawatthours)']].sum()
# Sum up the emissions for the different fuel types
power_ghg_df.reset_index(inplace = True)
new_power_ghg_df = power_ghg_df.pivot(index=['GHGRP Facility ID', 'year', 'Total reported direct emissions', 'CO2 emissions (non-biogenic) ', 'Methane (CH4) emissions ', 'Nitrous Oxide (N2O) emissions '], columns='AER Fuel Type Code', values='Net Generation (Megawatthours)')
# Calculate the percentage of electricity generated for the plant if multiple fuel types were used
new_power_ghg_df = new_power_ghg_df.fillna(0)
new_power_ghg_df['total_generation'] = new_power_ghg_df['COL'] + new_power_ghg_df['DFO'] + new_power_ghg_df['MLG'] + new_power_ghg_df['NG'] + new_power_ghg_df['OOG'] + new_power_ghg_df['OTH'] + new_power_ghg_df['WOC'] + new_power_ghg_df['WOO'] + new_power_ghg_df['WWW']
new_power_ghg_df.head()
# Function to calculate the percentages
def calcPercentage(overall_total, fuel_total):
if overall_total == 0:
return 0
else:
return fuel_total / overall_total
fuel_groups = ['COL', 'DFO', 'MLG', 'NG', 'OOG', 'OTH', 'WOC', 'WOO', 'WWW']
for fuel in fuel_groups:
new_column_name = fuel + "_pct"
new_power_ghg_df[new_column_name] = new_power_ghg_df.apply(lambda x: calcPercentage(x['total_generation'], x[fuel]), axis = 1)
new_power_ghg_df.reset_index(inplace = True)
new_power_ghg_df['co2_per_megawatthour'] = new_power_ghg_df['CO2 emissions (non-biogenic) '] / new_power_ghg_df['total_generation']
coal = alt.Chart(new_power_ghg_df).mark_circle(
size = 50,
color = "gray",
opacity = 0.4
).transform_filter(
'datum.COL_pct > 0.75'
).encode(
alt.X('total_generation:Q'),
alt.Y('Total reported direct emissions:Q')
).properties()
natural_gas = alt.Chart(new_power_ghg_df).mark_circle(
size = 50,
color = "orange",
opacity = 0.4
).transform_filter(
'datum.NG_pct > 0.75'
).encode(
alt.X('total_generation:Q', title = 'Total Generation (MWh)'),
alt.Y('Total reported direct emissions:Q', title = 'Total Emissions (MT)')
).properties(title="Emission Comparison Between Coal and Natural Gas")
coal + natural_gas
This was a graph to show the differences in the emissions generated by coal compared to natural gas. We have two quantitative variable we are display, so we use position for electricity and emissions. The nominal variable for energy type is using a color encoding to display a 3rd variable.
coal = alt.Chart(new_power_ghg_df, title = 'Total Emission Comparison for Coal and Natural Gas').mark_circle(
size = 50,
color = "gray"
).transform_filter(
'datum.COL_pct > 0.75'
).encode(
alt.X('sum(total_generation):Q'),
alt.Y('sum(Total reported direct emissions):Q')
)
natural_gas = alt.Chart(new_power_ghg_df).mark_circle(
size = 50,
color = "orange"
).transform_filter(
'datum.NG_pct > 0.75'
).encode(
alt.X('sum(total_generation):Q', title = 'Total Electricity Generated (MWh)'),
alt.Y('sum(Total reported direct emissions):Q', title = 'Total Emissions Report (Metric Tons)')
)
coal + natural_gas